This page last changed on Apr 19, 2006 by dblasby.
Summary

We are going to create two "derived" datasets from the base TIGER datasets.
The first is a set of landmarks demarked by points ("point_landmarks")
The second is a set of landmarks demarked by polygons ("poly_landmarks).

NOTE: there could be multiple names for one landmark (same 'module' and 'land' id but different laname). This happens 294 times.

Area Landmarks


Example of TIGER area landmarks in New York City.

Input Datasets and Process

1. poly2 - the basic TIGER polygons
2. landmarks - definition of ALL landmarks including landmark name ("laname") and the type code "cfcc".
3. arealandmarks - for each area landmark, this contains the list of "basic" polygons that are dissolved together.

The process is simple:
a) create area_landmarks_tmp which combines the information in the landmark and arealandmarks tables
b) for each area polygon, dissolve together all the sub-polygons that its made from
c) add extra information (see below)

Setup:

create index landmarks_idx_land on landmarks (land);
create index arealandmarks_idx_land on arealandmarks (land);
create index landmarks_idx_module on landmarks (module);
create index arealandmarks_idx_module on arealandmarks (module);

create index arealandmarks_idx_moduleland on arealandmarks (module,land);
create index landmarks_idx_moduleland on landmarks (module,land);

create index poly2_idx_moduleland on poly2 (module,polyid);

vacuum analyse landmarks;
vacuum analyse arealandmarks;
vacuum analyse poly2;

Step (a):

CREATE TABLE area_landmarks_tmp AS

SELECT landmarks.laname,
 landmarks.cfcc,
 landmarks.land,
 arealandmarks.module,
 arealandmarks.polyid

FROM landmarks,arealandmarks
 WHERE arealandmarks.land = landmarks.land
 AND arealandmarks.module = landmarks.module;
 
 -- CHECK:
--should be 0 (no bad data)
select count(*) from arealandmarks where land isnull or module isnull;

-- check to make sure that no landmarks disappeared
  select count(distinct module||land) from arealandmarks;
  select count(distinct module||land) from area_landmarks_tmp;
  select count(distinct module||land) from landmarks where wkb_geometry isnull; 

--should be same #

Step (b):

-- create a table with all the base polygons in a geometry collection
-- this takes a while!!!
CREATE TABLE poly_landmarks_tmp AS
 SELECT poly2.module,
 land,
 min(cfcc) as cfcc,
 substring(min(cfcc) from 1 for 1) as cfcc_1,
 substring(min(cfcc) from 2 for 1) as cfcc_2,
 substring(min(cfcc) from 3 for 1) as cfcc_3,
 min(laname) as laname,
 collect(the_geom)
 FROM area_landmarks_tmp,poly2
 WHERE area_landmarks_tmp.polyid = poly2.polyid
 AND   area_landmarks_tmp.module = poly2.module
 GROUP BY land,poly2.module;
 
 -- CHECK:
  -- should be the same
  SELECT count(*) from poly_landmarks_tmp;
  SELECT count(distinct(module||land)) FROM area_landmarks_tmp;
 
 -- should be the NEAR the same #
--  it will not be exactly because there some landmarks with multiple names
  SELECT sum( numgeometries(collect) ) FROM poly_landmarks_tmp;
  SELECT count(*) FROM arealandmarks;
 
 
  -- dissolve the geometry collection of base polygons
 alter table poly_landmarks_tmp add column buffer geometry;
 UPDATE poly_landmarks_tmp SET buffer=buffer(collect,0);

--check to make sure the results are valid geometries
SELECT module,land FROM poly_landmarks_tmp WHERE not(isvalid(buffer)) or buffer isnull;

-- CHECK:
 alter table poly_landmarks_tmp add column area_coll float8;
 alter table poly_landmarks_tmp add column area_buff float8;
 update poly_landmarks_tmp set area_coll = area(collect);
 update poly_landmarks_tmp set area_buff = area(buffer);

-- area is off by > 1 %  (likely a problem with the dissolving of polygons)
-- should return no rows
-- the 0.5 and 2/3 occurs when there are polygons on top of each other
-- like when there are multiple names for a single area landmark.
-- We do not worry about them.
     
 SELECT laname,module,land,(area_coll-area_buff)/area_coll as error
     FROM poly_landmarks_tmp
    WHERE abs(area_coll-area_buff)/area_coll >0.001
       AND (abs(area_coll-area_buff)/area_coll != 0.5) and (abs(area_coll-area_buff)/area_coll != 2.0/3.0)

Step (c):

-- augment the dataset with area.  We create an albers projection
-- centered on the polygon so we get an accurate area estimate

alter table poly_landmarks_tmp add column proj4 text;
update poly_landmarks_tmp set proj4 = '+proj=aea +lat_1='|| ymin(buffer)||' +lat_2='||ymax(buffer) || 
  ' +lat_0='|| ymin(buffer) ||' +lon_0='||xmin(buffer) ||
  ' +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs';


alter table poly_landmarks_tmp add column buffer_aea geometry;
update poly_landmarks_tmp set buffer_aea = 
   transform_geometry(buffer,'+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs',proj4,3005);

alter table poly_landmarks_tmp add column area_sq_m float8;
update poly_landmarks_tmp set area_sq_m = area(buffer_aea);

--check (should be 0)
select count(*) from poly_landmarks_tmp where area_sq_m isnull or area_sq_m <=0;
select isvalid(buffer_aea) from poly_landmarks_tmp where not(isvalid(buffer_aea));



-- generate the final dataset (dont just change the name of the table,
-- by recreating it we save a HUGE amount of disk space)
CREATE TABLE poly_landmarks AS 
  SELECT module,land,laname, cfcc,cfcc_1,cfcc_2,cfcc_3,buffer as the_geom,area_sq_m
  FROM poly_landmarks_tmp;
  
--clean up
DROP TABLE area_landmarks_tmp;
DROP TABLE poly_landmarks_tmp;

INSERT INTO geometry_columns values ('','public','poly_landmarks','the_geom',2,1,'GEOMETRY');
CREATE INDEX polyland_idx_module on poly_landmarks (module);
CREATE INDEX polyland_idx_moduleland on poly_landmarks (module,land);
CREATE INDEX polyland_idx_spatial on poly_landmarks using gist (the_geom gist_geometry_ops);
VACUUM ANALYSE  poly_landmarks;
Final poly_landmarks dataset

module – name of county this is in
land – landmark id
laname – landmark name
cfcc – classification (type) of the landmark
cfcc_1 – first letter of cfcc classification (for easy labeling)
cfcc_2 – second letter of cfcc classification (for easy labeling)
cfcc_3 – third letter of cfcc classification (for easy labeling)
the_geom – polygon/multipolygon for the landmark
area_sq_m-- area of the polygon (in square meters)

SELECT cfcc,count(*) as nfeatures FROM poly_landmarks GROUP BY  cfcc ORDER BY nfeatures;


 cfcc | nfeatures
------+-----------
 H31  |    113691  Perennial lake or pond
 D82  |     65233  Cemetery
 H11  |     57427  Perennial stream or river
 D85  |     23902  State or local park or forest
 E23  |     10267  Island
 H51  |      7939  Bay, estuary, gulf, or sound
 H41  |      6715  Perennial reservoir
 H32  |      3156  Intermittent lake or pond
 D51  |      2699  Airport or airfield
 D10  |      2601  Military installation or reservation
 D81  |      2558  Golf course
 D43  |      1939  Educational institution
 D84  |      1875  National forest or other Federal land
 D83  |      1731  National Park Service land
 H12  |      1326  Intermittent stream, river, or wash
 H21  |      1237  Perennial canal, ditch, or aqueduct
 H53  |       874  Sea, or ocean
 D31  |       617  Hospital, urgent care facility, clinic
 D23  |       505  Trailer court or mobile home park
 H81  |       476  Glacier
 D57  |       450  Airport (inside a city)
 D28  |       429  Campground
 D64  |       372  Amusement center
 D37  |       340  prison
 D61  |       312  Shopping center
 D62  |       191  Industrial building
 D65  |       173  Government center
 D44  |       169  Religious institution,
 H42  |       159  Treatment pond
 D21  |       102  Apartment building
 D36  |        87  Jail or detention center
 H60  |        87  Gravel pit or quarry filled with water
 D20  |        80  Multihousehold or transient quarters
 D90  |        62  Special purpose landmark;
 H22  |        62  Intermittent canal
 D33  |        57  Nursing home
 D27  |        42  Hotel
 D66  |        40  Other employment center
 D00  |        38  Landmark
 D63  |        34  Office building
 D26  |        34  Housing facility for workers
 D35  |        27  Orphanage
 D24  |        27  Marina
 H00  |        26  Water feature, classification unknown
 D42  |        17  Educational institution
 D54  |        16  Marine terminal
 D52  |        15  Train station
 H13  |        13  Braided stream
 H50  |        11  Bay, estuary, gulf, sound, sea, or ocean
 H30  |        11  Lake or pond
 D34  |         7  County home
 D53  |         3  Bus terminal
 D45  |         2  Museum
 D55  |         2  Seaplane anchorage
 D80  |         2  Open space
 D29  |         2  Shelter or mission
 D60  |         1  Employment center
 D58  |         1  Park and ride facility/parking lot
 D32  |         1  Halfway house
 H80  |         1  Special water feature
 D88  |         1  Landfill
 D40  |         1  Educational, cultural, or religious institution

See the TIGER manual [here|http://docs.codehaus.org/download/attachments/46766/TGR05FE.pdf] for what the feature codes refer too.

Point landmarks.


Example of point landmarks.

This is easy:

DROP TABLE point_landmarks; -- clean up

CREATE TABLE point_landmarks AS
 SELECT wkb_geometry as the_geom,laname , module,land, 
     cfcc as cfcc,
     substring(cfcc from 1 for 1) as cfcc_1,
     substring(cfcc from 2 for 1) as cfcc_2,
     substring(cfcc from 3 for 1) as cfcc_3
 FROM landmarks
 WHERE not(wkb_geometry isnull) AND length(laname) != 0;
 
INSERT INTO geometry_columns values ('','public','point_landmarks','the_geom',2,1,'GEOMETRY');
 
CREATE INDEX  pointland_idx_module on point_landmarks (module);
CREATE INDEX pointland_idx_moduleland on point_landmarks (module,land);
CREATE INDEX pointland_idx_spatial on point_landmarks using gist (the_geom gist_geometry_ops);
VACUUM ANALYSE  point_landmarks;

poly_landmark.gif (image/gif)
point_landmark.gif (image/gif)
point_landmark.gif (image/gif)
Document generated by Confluence on Jan 16, 2008 23:28